import pandas as pd
import plotly.express as px
# Load the dataset
df = pd.read_csv("netflix_titles.csv")
# Initialize the "is_interesting" column
df["is_interesting"] = False
# Create a scatter plot
fig = px.scatter(df, x='release_year', y='duration', color='type',
title='Netflix Movies and TV Shows',
labels={'release_year': 'Release Year', 'duration': 'Duration (minutes)'},
hover_data={'title': True})
# Display the plot
fig.show()
# Let's load the dataset first to see its structure and understand how to modify the provided code
import pandas as pd
# Load the Netflix dataset
netflix_df = pd.read_csv('netflix_titles.csv')
# Visualize the frequency of Netflix content by its rating
rating_freq = netflix_df.rating.value_counts()
ax = rating_freq.plot.bar(color='skyblue')
ax.set_title('Netflix Content by Rating', color='black', fontsize=15)
ax.set_xlabel('Rating', fontsize=12)
ax.set_ylabel('Frequency', fontsize=12)
# Annotate the height of each bar for better readability
for p in ax.patches:
ax.annotate(format(p.get_height(), '.0f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center',
xytext=(0, 10),
textcoords='offset points')
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
# Load the dataset
netflix_df = pd.read_csv('netflix_titles.csv')
# Prepare the data
netflix_df['country'] = netflix_df['country'].str.split(', ')
country_data = netflix_df.explode('country')
country_release_counts = country_data.groupby(['country', 'release_year']).size().reset_index(name='counts')
top_countries = country_release_counts.groupby('country')['counts'].sum().nlargest(10).index
top_country_data = country_release_counts[country_release_counts['country'].isin(top_countries)]
# Create an output widget to hold the plot
plot_output = widgets.Output()
# Main function to create heatmap
def create_heatmap(order_by='total_count'):
if order_by == 'country':
country_order = sorted(top_country_data['country'].unique())
elif order_by == 'total_count':
country_order = top_country_data.groupby('country')['counts'].sum().sort_values(ascending=False).index
ordered_data = top_country_data[top_country_data['country'].isin(country_order)]
ordered_pivot = ordered_data.pivot("country", "release_year", "counts").reindex(country_order).fillna(0).astype(int)
with plot_output:
plot_output.clear_output(wait=True) # Clear the previous plot
plt.figure(figsize=(20, 8))
sns.heatmap(ordered_pivot, cmap="YlGnBu", linewidths=.5, annot=True, fmt="d")
plt.title('Number of Titles Released per Year for Top 10 Countries on Netflix')
plt.ylabel('Country')
plt.xlabel('Release Year')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()
# Dropdown widget
order_by_widget = widgets.Dropdown(
options=['Country', 'Total Count'],
value='Total Count',
description='Order by:',
disabled=False,
)
# Event handler for the dropdown, to update the heatmap based on selection
def on_order_change(change):
create_heatmap(order_by=change.new.lower().replace(' ', '_'))
order_by_widget.observe(on_order_change, names='value')
# Display the widget and the initial plot
display(order_by_widget, plot_output)
create_heatmap() # Generate the initial heatmap
Dropdown(description='Order by:', index=1, options=('Country', 'Total Count'), value='Total Count')
Output()
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
from collections import Counter
# Load and prepare the data (assuming the dataset is loaded into `df`)
df = pd.read_csv('netflix_titles.csv')
country_counts = Counter([country.strip() for sublist in df['country'].dropna().str.split(', ') for country in sublist])
country_counts_df = pd.DataFrame(country_counts.items(), columns=['Country', 'Count'])
# Create subplot figure with 1 row and 2 columns
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "choropleth"}, {"type": "choropleth"}]],
subplot_titles=("Electric Color Scale", "Picnic Color Scale"),
horizontal_spacing=0.02)
# Add the first choropleth map with "Electric" color scale
fig.add_trace(go.Choropleth(locations=country_counts_df['Country'],
z=country_counts_df['Count'],
locationmode='country names',
colorscale='Electric',
colorbar=dict(len=0.45, x=0.46, title="Count")),
1, 1)
# Add the second choropleth map with "Picnic" color scale
fig.add_trace(go.Choropleth(locations=country_counts_df['Country'],
z=country_counts_df['Count'],
locationmode='country names',
colorscale='Picnic',
colorbar=dict(len=0.45, x=1, title="Count")),
1, 2)
# Update layout for a better fit and to set the title
fig.update_layout(title_text="Number of Netflix Titles by Country with Different Color Scales", height=400)
# Display the figure
fig.show()
#abstract/#ellobrate
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from mpl_toolkits.mplot3d import Axes3D
# Load the dataset
df = pd.read_csv("netflix_titles.csv")
# Preprocess duration data
df['duration'] = df['duration'].str.extract('(\d+)').astype(float) # Extract numeric part and convert to float
# Initial scatter plot in 3D
def plot_scatter_3d(granularity):
num_points = int(len(df) * granularity)
sampled_data = df.sample(num_points)
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(sampled_data['release_year'], range(len(sampled_data)), sampled_data['duration'])
ax.set_xlabel('Release Year')
ax.set_ylabel('Index of Netflix Titles')
ax.set_zlabel('Duration (minutes)')
ax.set_title('3D Scatter Plot with Adjustable Granularity')
plt.grid(True)
plt.show()
# Create slider for adjusting granularity
granularity_slider = widgets.FloatSlider(min=0.1, max=1, step=0.1, value=0.5, description='Granularity:')
widgets.interactive(plot_scatter_3d, granularity=granularity_slider)
import pandas as pd
import plotly.express as px
# Load the Netflix dataset
netflix_data = pd.read_csv('netflix_titles.csv')
# Create a scatter plot
sc1 = px.scatter(netflix_data, x='release_year', y='duration', hover_data=['title', 'rating', 'listed_in'])
def update_zoom(trace, points, selector):
x = points.x[0]
y = points.y[0]
sc1.update_layout(
xaxis=dict(
range=[x - 1, x + 1],
type='linear',
autorange=False
),
yaxis=dict(
range=[y - 10, y + 10],
type='linear',
autorange=False
)
)
sc1.update_layout(
updatemenus=[
dict(
type='buttons',
showactive=False,
buttons=[
dict(
label='Reset Zoom',
method='update',
args=[
{},
{'xaxis.autorange': True, 'yaxis.autorange': True}
]
)
]
)
]
)
sc1.show()
import pandas as pd
import matplotlib.pyplot as plt
# Load the dataset
df = pd.read_csv("netflix_titles.csv")
# Function to filter and plot data based on selected genre
def plot_by_genre(genre):
if genre == 'All':
filtered_df = df
else:
filtered_df = df[df['listed_in'].str.contains(genre, case=False, na=False)]
plt.figure(figsize=(10, 6))
plt.hist(filtered_df['release_year'], bins=20, color='skyblue', edgecolor='black')
plt.xlabel('Release Year')
plt.ylabel('Frequency')
plt.title(f'Distribution of Release Year for Genre: {genre}')
plt.grid(True)
plt.show()
# Create dropdown menu for selecting genre
genre_dropdown = widgets.Dropdown(options=['All'] + df['listed_in'].unique().tolist(), description='Genre:')
widgets.interactive(plot_by_genre, genre=genre_dropdown)
# Create a slider to adjust the year range
year_range = widgets.IntRangeSlider(min=df['release_year'].min(), max=df['release_year'].max(), value=[df['release_year'].min(), df['release_year'].max()], description='Year Range:')
# Update the plot based on year range and genre selection
def update_plot(genre, year_range):
filtered_df = df[(df['release_year'] >= year_range[0]) & (df['release_year'] <= year_range[1])]
if genre != 'All':
filtered_df = filtered_df[filtered_df['listed_in'].str.contains(genre, case=False, na=False)]
plt.figure(figsize=(10, 6))
plt.hist(filtered_df['release_year'], bins=20, color='skyblue', edgecolor='black')
plt.xlabel('Release Year')
plt.ylabel('Frequency')
plt.title(f'Distribution of Release Year for Genre: {genre}')
plt.grid(True)
plt.show()
# Connect the widgets to the update function
widgets.interactive(update_plot, genre=genre_dropdown, year_range=year_range)
interactive(children=(Dropdown(description='Genre:', options=('All', 'Documentaries', 'International TV Shows,…
import pandas as pd
import plotly.express as px
# Load the dataset
netflix_data = pd.read_csv('netflix_titles.csv')
# Assuming netflix_data is your DataFrame and it's already been loaded
# Convert 'release_year' to numeric, just in case it hasn't been done
netflix_data['release_year'] = pd.to_numeric(netflix_data['release_year'], errors='coerce')
# Drop any rows with NaN in 'release_year' after conversion
netflix_data_clean = netflix_data.dropna(subset=['release_year'])
# Binning 'release_year' into decades
bins = [1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020, 2030]
labels = ['1900s', '1910s', '1920s', '1930s', '1940s', '1950s', '1960s', '1970s', '1980s', '1990s', '2000s', '2010s', '2020s']
netflix_data_clean['decade'] = pd.cut(netflix_data_clean['release_year'], bins=bins, labels=labels, right=False)
# Ensure 'type' and 'rating' are treated as categorical
netflix_data_clean['type'] = netflix_data_clean['type'].astype('category')
netflix_data_clean['rating'] = netflix_data_clean['rating'].astype('category')
# Prepare the data for the parallel coordinates plot
# Filter to reduce data size for visualization if necessary
data_for_visualization = netflix_data_clean[['type', 'decade', 'rating']].dropna()
# Generating the parallel coordinates plot
fig = px.parallel_categories(data_for_visualization, dimensions=['type', 'decade', 'rating'],
color_continuous_scale=px.colors.sequential.Inferno,
labels={'type': 'Content Type', 'decade': 'Release Decade', 'rating': 'Rating'})
# Show the plot
fig.show()
# Import necessary libraries
import pandas as pd
import plotly.express as px
# Load the Netflix dataset
netflix_data = pd.read_csv('netflix_titles.csv')
# Create a polar line chart
fig3 = px.line_polar(netflix_data, r='rating', theta='type')
# Show the polar line chart
fig3.show()